import pandas as pd
linkData="https://github.com/SocialAnalytics-StrategicIntelligence/TableOperations/raw/main/dengue_ok.pkl"
dengue = pd.read_pickle(linkData)
# checking format
dengue.info()<class 'pandas.core.frame.DataFrame'>
RangeIndex: 501236 entries, 0 to 501235
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 departamento 501236 non-null object
1 provincia 501236 non-null object
2 distrito 501236 non-null object
3 ano 501236 non-null int64
4 semana 501236 non-null int64
5 sexo 501236 non-null object
6 edad 501236 non-null int64
7 enfermedad 501236 non-null category
8 case 501236 non-null int64
dtypes: category(1), int64(4), object(4)
memory usage: 31.1+ MB
# Each row is a person:
dengue.head()| departamento | provincia | distrito | ano | semana | sexo | edad | enfermedad | case | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | HUANUCO | LEONCIO PRADO | LUYANDO | 2000 | 47 | M | 9 | SIN_SEÑALES | 1 |
| 1 | HUANUCO | LEONCIO PRADO | LUYANDO | 2000 | 40 | F | 18 | SIN_SEÑALES | 1 |
| 2 | HUANUCO | LEONCIO PRADO | JOSE CRESPO Y CASTILLO | 2000 | 48 | F | 32 | SIN_SEÑALES | 1 |
| 3 | HUANUCO | LEONCIO PRADO | JOSE CRESPO Y CASTILLO | 2000 | 37 | F | 40 | SIN_SEÑALES | 1 |
| 4 | HUANUCO | LEONCIO PRADO | MARIANO DAMASO BERAUN | 2000 | 42 | M | 16 | SIN_SEÑALES | 1 |
# some exploration
dengue.describe().apply(lambda s: s.apply('{0:.5f}'.format))| ano | semana | edad | case | |
|---|---|---|---|---|
| count | 501236.00000 | 501236.00000 | 501236.00000 | 501236.00000 |
| mean | 2014.77213 | 21.99838 | 28.96143 | 1.00000 |
| std | 6.14646 | 14.76658 | 18.15954 | 0.00000 |
| min | 2000.00000 | 1.00000 | 0.00000 | 1.00000 |
| 25% | 2011.00000 | 11.00000 | 15.00000 | 1.00000 |
| 50% | 2016.00000 | 18.00000 | 26.00000 | 1.00000 |
| 75% | 2020.00000 | 32.00000 | 41.00000 | 1.00000 |
| max | 2022.00000 | 53.00000 | 106.00000 | 1.00000 |
# exploring
dengue.enfermedad.value_counts()| count | |
|---|---|
| enfermedad | |
| SIN_SEÑALES | 443996 |
| ALARMA | 54981 |
| GRAVE | 2259 |
Better labels:
dengue['enfermedad_text']=dengue.enfermedad.astype(str)
dengue.replace({'enfermedad_text':{'SIN_SEÑALES':'1_SIN_SEÑALES','ALARMA':'2_ALARMA','GRAVE':'3_GRAVE'}},inplace=True)# exploring
dengue.ano.value_counts(sort=False)| count | |
|---|---|
| ano | |
| 2000 | 5557 |
| 2001 | 23526 |
| 2002 | 8086 |
| 2003 | 3349 |
| 2004 | 9547 |
| 2005 | 5640 |
| 2006 | 4022 |
| 2007 | 6344 |
| 2008 | 12824 |
| 2009 | 13407 |
| 2010 | 16842 |
| 2011 | 28084 |
| 2012 | 28505 |
| 2013 | 13092 |
| 2015 | 35816 |
| 2014 | 17234 |
| 2016 | 25160 |
| 2017 | 68279 |
| 2018 | 4698 |
| 2019 | 15287 |
| 2020 | 47932 |
| 2021 | 44791 |
| 2022 | 63214 |
dengue_2018=dengue[dengue['ano'] == 2018]
dengue_2018.describe()| ano | semana | edad | case | |
|---|---|---|---|---|
| count | 4698.0 | 4698.000000 | 4698.000000 | 4698.0 |
| mean | 2018.0 | 21.658791 | 29.907195 | 1.0 |
| std | 0.0 | 16.756024 | 18.451125 | 0.0 |
| min | 2018.0 | 1.000000 | 1.000000 | 1.0 |
| 25% | 2018.0 | 7.000000 | 15.000000 | 1.0 |
| 50% | 2018.0 | 16.000000 | 27.000000 | 1.0 |
| 75% | 2018.0 | 39.000000 | 42.000000 | 1.0 |
| max | 2018.0 | 52.000000 | 94.000000 | 1.0 |
Discretizing:
binLimits=[0,15,50,110]
theLabels=["a_menor_a_16","b_entre_16y50","c_mayor_a_50"]
dengue_2018["edad_grupos"]=pd.cut(dengue_2018['edad'], include_lowest=True,
bins=binLimits,
labels=theLabels,
ordered=True)
# see
dengue_2018.head()<ipython-input-14-42147429234e>:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
dengue_2018["edad_grupos"]=pd.cut(dengue_2018['edad'], include_lowest=True,
| departamento | provincia | distrito | ano | semana | sexo | edad | enfermedad | case | enfermedad_text | edad_grupos | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 325313 | PIURA | TALARA | LOS ORGANOS | 2018 | 17 | M | 47 | SIN_SEÑALES | 1 | 1_SIN_SEÑALES | b_entre_16y50 |
| 325314 | LORETO | ALTO AMAZONAS | YURIMAGUAS | 2018 | 45 | M | 20 | SIN_SEÑALES | 1 | 1_SIN_SEÑALES | b_entre_16y50 |
| 325315 | LORETO | ALTO AMAZONAS | YURIMAGUAS | 2018 | 52 | M | 42 | ALARMA | 1 | 2_ALARMA | b_entre_16y50 |
| 325316 | LORETO | MAYNAS | IQUITOS | 2018 | 1 | F | 28 | SIN_SEÑALES | 1 | 1_SIN_SEÑALES | b_entre_16y50 |
| 325317 | MADRE DE DIOS | TAMBOPATA | TAMBOPATA | 2018 | 2 | F | 39 | SIN_SEÑALES | 1 | 1_SIN_SEÑALES | b_entre_16y50 |
The surface:
pd.crosstab( dengue_2018.enfermedad_text,dengue_2018.edad_grupos, dropna=False, normalize='columns')| edad_grupos | a_menor_a_16 | b_entre_16y50 | c_mayor_a_50 |
|---|---|---|---|
| enfermedad_text | |||
| 1_SIN_SEÑALES | 0.769168 | 0.746817 | 0.728907 |
| 2_ALARMA | 0.219413 | 0.238996 | 0.255878 |
| 3_GRAVE | 0.011419 | 0.014187 | 0.015214 |
pd.crosstab(dengue_2018.enfermedad_text,[dengue_2018.sexo,dengue_2018.edad_grupos], dropna=False, normalize='columns')| sexo | F | M | ||||
|---|---|---|---|---|---|---|
| edad_grupos | a_menor_a_16 | b_entre_16y50 | c_mayor_a_50 | a_menor_a_16 | b_entre_16y50 | c_mayor_a_50 |
| enfermedad_text | ||||||
| 1_SIN_SEÑALES | 0.755408 | 0.739556 | 0.711864 | 0.7824 | 0.755957 | 0.745257 |
| 2_ALARMA | 0.231281 | 0.242820 | 0.268362 | 0.2080 | 0.234182 | 0.243902 |
| 3_GRAVE | 0.013311 | 0.017624 | 0.019774 | 0.0096 | 0.009860 | 0.010840 |
# for colab
!pip install altair -U
!pip install "vegafusion-jupyter[embed]"Requirement already satisfied: altair in /usr/local/lib/python3.10/dist-packages (4.2.2)
Collecting altair
Downloading altair-5.4.1-py3-none-any.whl.metadata (9.4 kB)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from altair) (3.1.4)
Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair) (4.23.0)
Collecting narwhals>=1.5.2 (from altair)
Downloading narwhals-1.6.0-py3-none-any.whl.metadata (5.8 kB)
Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from altair) (24.1)
Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.10/dist-packages (from altair) (4.12.2)
Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (24.2.0)
Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (2023.12.1)
Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (0.35.1)
Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (0.20.0)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->altair) (2.1.5)
Downloading altair-5.4.1-py3-none-any.whl (658 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 658.1/658.1 kB 35.8 MB/s eta 0:00:00
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 154.6/154.6 kB 14.9 MB/s eta 0:00:00
pting uninstall: altair
Found existing installation: altair 4.2.2
Uninstalling altair-4.2.2:
Successfully uninstalled altair-4.2.2
Successfully installed altair-5.4.1 narwhals-1.6.0
Collecting vegafusion-jupyter[embed]
Downloading vegafusion_jupyter-1.6.9-py3-none-any.whl.metadata (3.7 kB)
Requirement already satisfied: ipywidgets<9,>=7.0.0 in /usr/local/lib/python3.10/dist-packages (from vegafusion-jupyter[embed]) (7.7.1)
Requirement already satisfied: altair>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from vegafusion-jupyter[embed]) (5.4.1)
Collecting vegafusion==1.6.9 (from vegafusion-jupyter[embed])
Downloading vegafusion-1.6.9-py3-none-any.whl.metadata (1.3 kB)
Collecting vegafusion-python-embed==1.6.9 (from vegafusion-jupyter[embed])
Downloading vegafusion_python_embed-1.6.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (394 bytes)
Collecting vl-convert-python>=0.7.0 (from vegafusion-jupyter[embed])
Downloading vl_convert_python-1.6.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Requirement already satisfied: pyarrow>=5 in /usr/local/lib/python3.10/dist-packages (from vegafusion==1.6.9->vegafusion-jupyter[embed]) (14.0.2)
Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from vegafusion==1.6.9->vegafusion-jupyter[embed]) (2.1.4)
Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from vegafusion==1.6.9->vegafusion-jupyter[embed]) (5.9.5)
Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from vegafusion==1.6.9->vegafusion-jupyter[embed]) (3.20.3)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->vegafusion-jupyter[embed]) (3.1.4)
Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->vegafusion-jupyter[embed]) (4.23.0)
Requirement already satisfied: narwhals>=1.5.2 in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->vegafusion-jupyter[embed]) (1.6.0)
Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->vegafusion-jupyter[embed]) (24.1)
Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.10/dist-packages (from altair>=4.2.0->vegafusion-jupyter[embed]) (4.12.2)
Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (5.5.6)
Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.2.0)
Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (5.7.1)
Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (3.6.8)
Requirement already satisfied: ipython>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (7.34.0)
Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (3.0.13)
Requirement already satisfied: jupyter-client in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (6.1.12)
Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (6.3.3)
Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (71.0.4)
Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed])
Using cached jedi-0.19.1-py2.py3-none-any.whl.metadata (22 kB)
Requirement already satisfied: decorator in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (4.4.2)
Requirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.7.5)
Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (3.0.47)
Requirement already satisfied: pygments in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (2.16.1)
Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.2.0)
Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.1.7)
Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (4.9.0)
Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=4.2.0->vegafusion-jupyter[embed]) (24.2.0)
Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=4.2.0->vegafusion-jupyter[embed]) (2023.12.1)
Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=4.2.0->vegafusion-jupyter[embed]) (0.35.1)
Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=4.2.0->vegafusion-jupyter[embed]) (0.20.0)
Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.10/dist-packages (from pyarrow>=5->vegafusion==1.6.9->vegafusion-jupyter[embed]) (1.26.4)
Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.10/dist-packages (from widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (6.5.5)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->altair>=4.2.0->vegafusion-jupyter[embed]) (2.1.5)
Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion==1.6.9->vegafusion-jupyter[embed]) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion==1.6.9->vegafusion-jupyter[embed]) (2024.1)
Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion==1.6.9->vegafusion-jupyter[embed]) (2024.1)
Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.8.4)
Requirement already satisfied: pyzmq<25,>=17 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (24.0.1)
Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (23.1.0)
Requirement already satisfied: jupyter-core>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (5.7.2)
Requirement already satisfied: nbformat in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (5.10.4)
Requirement already satisfied: nbconvert>=5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (6.5.4)
Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.6.0)
Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.8.3)
Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.18.1)
Requirement already satisfied: prometheus-client in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.20.0)
Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.1.0)
Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.7.0)
Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=4.0.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.2.13)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->vegafusion==1.6.9->vegafusion-jupyter[embed]) (1.16.0)
Requirement already satisfied: platformdirs>=2.5 in /usr/local/lib/python3.10/dist-packages (from jupyter-core>=4.6.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (4.2.2)
Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.2.4)
Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (4.9.4)
Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (4.12.3)
Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (6.1.0)
Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.7.1)
Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.4)
Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.3.0)
Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.8.4)
Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.10.0)
Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.5.1)
Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.3.0)
Requirement already satisfied: fastjsonschema>=2.15 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (2.20.0)
Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.10/dist-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (21.2.0)
Requirement already satisfied: jupyter-server<3,>=1.8 in /usr/local/lib/python3.10/dist-packages (from notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.24.0)
Requirement already satisfied: cffi>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.17.0)
Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (2.6)
Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (0.5.1)
Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (2.22)
Requirement already satisfied: anyio<4,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (3.7.1)
Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.8.0)
Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (3.8)
Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.3.1)
Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<9,>=7.0.0->vegafusion-jupyter[embed]) (1.2.2)
Downloading vegafusion-1.6.9-py3-none-any.whl (54 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 54.5/54.5 kB 5.7 MB/s eta 0:00:00
bed-1.6.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (25.1 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 25.1/25.1 MB 65.8 MB/s eta 0:00:00
anylinux_2_17_x86_64.manylinux2014_x86_64.whl (29.4 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 29.4/29.4 MB 17.5 MB/s eta 0:00:00
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 9.8/9.8 MB 100.7 MB/s eta 0:00:00
bed, vl-convert-python, jedi, vegafusion, vegafusion-jupyter
Successfully installed jedi-0.19.1 vegafusion-1.6.9 vegafusion-jupyter-1.6.9 vegafusion-python-embed-1.6.9 vl-convert-python-1.6.1
!pip install vegafusionRequirement already satisfied: vegafusion in /usr/local/lib/python3.10/dist-packages (1.6.9)
Requirement already satisfied: altair>=5.2.0 in /usr/local/lib/python3.10/dist-packages (from vegafusion) (5.4.1)
Requirement already satisfied: pyarrow>=5 in /usr/local/lib/python3.10/dist-packages (from vegafusion) (14.0.2)
Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from vegafusion) (2.1.4)
Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from vegafusion) (5.9.5)
Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from vegafusion) (3.20.3)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from altair>=5.2.0->vegafusion) (3.1.4)
Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair>=5.2.0->vegafusion) (4.23.0)
Requirement already satisfied: narwhals>=1.5.2 in /usr/local/lib/python3.10/dist-packages (from altair>=5.2.0->vegafusion) (1.6.0)
Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from altair>=5.2.0->vegafusion) (24.1)
Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.10/dist-packages (from altair>=5.2.0->vegafusion) (4.12.2)
Requirement already satisfied: numpy>=1.16.6 in /usr/local/lib/python3.10/dist-packages (from pyarrow>=5->vegafusion) (1.26.4)
Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion) (2024.1)
Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion) (2024.1)
Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=5.2.0->vegafusion) (24.2.0)
Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=5.2.0->vegafusion) (2023.12.1)
Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=5.2.0->vegafusion) (0.35.1)
Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=5.2.0->vegafusion) (0.20.0)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->vegafusion) (1.16.0)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->altair>=5.2.0->vegafusion) (2.1.5)
import altair as alt
# Deshabilitar cualquier transformador personalizado y usar el predeterminado
alt.data_transformers.enable('default')
DataTransformerRegistry.enable('default')
alt_dengue=alt.Chart(dengue_2018)
enc_dengue=alt_dengue.encode(
x='semana:T',
y='mean(edad):Q',
color='enfermedad_text:N',
)
enc_dengue.mark_line() + enc_dengue.mark_errorband()/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
More detailed:
enc_dengue=alt_dengue.encode(
x='semana:T',
y='median(edad):Q',
color='enfermedad_text:N',
tooltip=['median(edad)','ano:T']
).interactive()
enc_dengue.mark_line().facet(
row='sexo:N',
column='edad_grupos:N'
)/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
enc_dengue=alt_dengue.encode(
x='semana:T',
y=alt.Y('sum(case):Q'),
color='enfermedad_text:N',
tooltip=['sum(case):Q','ano:T']
).interactive()
enc_dengue.mark_line().facet(
row='sexo:N',
column='edad_grupos:N'
)/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
The previous plot may require a logged Y-axis:
enc_dengue=alt_dengue.encode(
x='semana:T',
y=alt.Y('sum(case):Q', scale=alt.Scale(type='log')),
color='enfermedad_text:N',
tooltip=['sum(case):Q','ano:T']
).interactive()
enc_dengue.mark_line().facet(
row='sexo:N',
column='edad_grupos:N'
)/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
Let's get the same results in tables:
indexList=['edad_grupos','semana','sexo','enfermedad_text']
aggregator={'edad': ['median']}
LevelByYear_medians=dengue.groupby(indexList,observed=True).agg(aggregator)
LevelByYear_medians| edad | ||||
|---|---|---|---|---|
| median | ||||
| edad_grupos | semana | sexo | enfermedad_text | |
| a_menor_a_16 | 1 | F | 1_SIN_SEÑALES | 9.0 |
| 2_ALARMA | 9.0 | |||
| 3_GRAVE | 5.0 | |||
| M | 1_SIN_SEÑALES | 10.0 | ||
| 2_ALARMA | 11.0 | |||
| ... | ... | ... | ... | ... |
| c_mayor_a_50 | 53 | F | 1_SIN_SEÑALES | 58.0 |
| 2_ALARMA | 65.0 | |||
| 3_GRAVE | 51.0 | |||
| M | 1_SIN_SEÑALES | 59.0 | ||
| 2_ALARMA | 58.0 |
941 rows × 1 columns
LevelByYear_medians.unstack(['sexo','enfermedad_text'])| edad | |||||||
|---|---|---|---|---|---|---|---|
| median | |||||||
| sexo | F | M | |||||
| enfermedad_text | 1_SIN_SEÑALES | 2_ALARMA | 3_GRAVE | 1_SIN_SEÑALES | 2_ALARMA | 3_GRAVE | |
| edad_grupos | semana | ||||||
| a_menor_a_16 | 1 | 9.0 | 9.0 | 5.0 | 10.0 | 11.0 | 11.0 |
| 2 | 10.0 | 10.0 | 12.0 | 9.0 | 10.5 | 7.0 | |
| 3 | 9.0 | 10.0 | 11.0 | 9.0 | 10.0 | 10.0 | |
| 4 | 9.0 | 10.0 | 7.0 | 9.0 | 10.0 | 8.0 | |
| 5 | 9.0 | 9.0 | 8.0 | 9.0 | 9.0 | 9.0 | |
| ... | ... | ... | ... | ... | ... | ... | ... |
| c_mayor_a_50 | 49 | 59.0 | 59.0 | 67.0 | 59.0 | 61.0 | 63.0 |
| 50 | 60.0 | 60.0 | 54.5 | 59.0 | 60.0 | 56.0 | |
| 51 | 58.0 | 58.5 | 60.0 | 60.0 | 62.0 | 61.0 | |
| 52 | 59.0 | 60.5 | 62.5 | 59.0 | 60.0 | 66.0 | |
| 53 | 58.0 | 65.0 | 51.0 | 59.0 | 58.0 | NaN | |
159 rows × 6 columns
Notice the multi-index:
LevelByYear_medians.info()<class 'pandas.core.frame.DataFrame'>
MultiIndex: 941 entries, ('a_menor_a_16', 1, 'F', '1_SIN_SEÑALES') to ('c_mayor_a_50', 53, 'M', '2_ALARMA')
Data columns (total 1 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 (edad, median) 941 non-null float64
dtypes: float64(1)
memory usage: 12.0+ KB
These are other possibilities, but not better than the lines:
alt_dengue=alt.Chart(dengue_2018)
enc_dengue=alt_dengue.encode(
x='semana:T',
y=alt.Y('sum(case):Q', scale=alt.Scale(type='log')),
column='enfermedad_text:N'
)
enc_dengue.mark_circle()/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
alt_dengue=alt.Chart(dengue_2018)
enc_dengue=alt_dengue.encode(
x='semana:T',
y=alt.Y('sum(case):Q', scale=alt.Scale(type='log')),
column='enfermedad_text:N',
)
enc_dengue.mark_rule()/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
alt_dengue=alt.Chart(dengue_2018)
enc_dengue=alt_dengue.encode(
x='semana:T',
y=alt.Y('sum(case):Q', scale=alt.Scale(type='log')),
column='enfermedad_text:N',
)
enc_dengue.mark_bar()/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
Let's do some aggregation:
indexList=['edad_grupos','semana','sexo','enfermedad_text']
aggregator={'edad': ['median','mean','min','max']}
LevelByYear_statsFull=dengue.groupby(indexList,observed=True).agg(aggregator)
LevelByYear_statsFull| edad | |||||||
|---|---|---|---|---|---|---|---|
| median | mean | min | max | ||||
| edad_grupos | semana | sexo | enfermedad_text | ||||
| a_menor_a_16 | 1 | F | 1_SIN_SEÑALES | 9.0 | 8.912214 | 1 | 15 |
| 2_ALARMA | 9.0 | 9.000000 | 1 | 15 | |||
| 3_GRAVE | 5.0 | 6.214286 | 2 | 15 | |||
| M | 1_SIN_SEÑALES | 10.0 | 9.153705 | 1 | 15 | ||
| 2_ALARMA | 11.0 | 9.564593 | 1 | 15 | |||
| ... | ... | ... | ... | ... | ... | ... | ... |
| c_mayor_a_50 | 53 | F | 1_SIN_SEÑALES | 58.0 | 60.319672 | 51 | 92 |
| 2_ALARMA | 65.0 | 65.444444 | 52 | 87 | |||
| 3_GRAVE | 51.0 | 51.000000 | 51 | 51 | |||
| M | 1_SIN_SEÑALES | 59.0 | 62.560440 | 51 | 90 | ||
| 2_ALARMA | 58.0 | 59.090909 | 51 | 78 | |||
941 rows × 4 columns
Now, some reshaping:
LevelByYear_statsFull.stack(future_stack=True)| edad | |||||
|---|---|---|---|---|---|
| edad_grupos | semana | sexo | enfermedad_text | ||
| a_menor_a_16 | 1 | F | 1_SIN_SEÑALES | median | 9.000000 |
| mean | 8.912214 | ||||
| min | 1.000000 | ||||
| max | 15.000000 | ||||
| 2_ALARMA | median | 9.000000 | |||
| ... | ... | ... | ... | ... | ... |
| c_mayor_a_50 | 53 | M | 1_SIN_SEÑALES | max | 90.000000 |
| 2_ALARMA | median | 58.000000 | |||
| mean | 59.090909 | ||||
| min | 51.000000 | ||||
| max | 78.000000 |
3764 rows × 1 columns
Let's use departamento and provincia:
indexList=['semana','departamento','provincia','enfermedad_text']
aggregator={'case':['sum']}
ByYearPlace=dengue.groupby(indexList,observed=True).agg(aggregator)
ByYearPlace| case | ||||
|---|---|---|---|---|
| sum | ||||
| semana | departamento | provincia | enfermedad_text | |
| 1 | AMAZONAS | BAGUA | 1_SIN_SEÑALES | 60 |
| 2_ALARMA | 7 | |||
| 3_GRAVE | 1 | |||
| CHACHAPOYAS | 1_SIN_SEÑALES | 10 | ||
| 2_ALARMA | 2 | |||
| ... | ... | ... | ... | ... |
| 53 | UCAYALI | CORONEL PORTILLO | 1_SIN_SEÑALES | 404 |
| 2_ALARMA | 47 | |||
| 3_GRAVE | 4 | |||
| PADRE ABAD | 1_SIN_SEÑALES | 18 | ||
| 2_ALARMA | 4 |
7157 rows × 1 columns
Create a wide shape:
#long to wide
ByYearPlace.unstack()| case | |||||
|---|---|---|---|---|---|
| sum | |||||
| enfermedad_text | 1_SIN_SEÑALES | 2_ALARMA | 3_GRAVE | ||
| semana | departamento | provincia | |||
| 1 | AMAZONAS | BAGUA | 60.0 | 7.0 | 1.0 |
| CHACHAPOYAS | 10.0 | 2.0 | NaN | ||
| CONDORCANQUI | 16.0 | 1.0 | NaN | ||
| UTCUBAMBA | 77.0 | 5.0 | NaN | ||
| ANCASH | CASMA | 14.0 | 2.0 | NaN | |
| ... | ... | ... | ... | ... | ... |
| 53 | TUMBES | TUMBES | 42.0 | 10.0 | NaN |
| ZARUMILLA | 4.0 | NaN | NaN | ||
| UCAYALI | ATALAYA | 4.0 | NaN | 1.0 | |
| CORONEL PORTILLO | 404.0 | 47.0 | 4.0 | ||
| PADRE ABAD | 18.0 | 4.0 | NaN | ||
3706 rows × 3 columns
# no missing values
ByYearPlace_wide=ByYearPlace.unstack().fillna(0)
ByYearPlace_wide| case | |||||
|---|---|---|---|---|---|
| sum | |||||
| enfermedad_text | 1_SIN_SEÑALES | 2_ALARMA | 3_GRAVE | ||
| semana | departamento | provincia | |||
| 1 | AMAZONAS | BAGUA | 60.0 | 7.0 | 1.0 |
| CHACHAPOYAS | 10.0 | 2.0 | 0.0 | ||
| CONDORCANQUI | 16.0 | 1.0 | 0.0 | ||
| UTCUBAMBA | 77.0 | 5.0 | 0.0 | ||
| ANCASH | CASMA | 14.0 | 2.0 | 0.0 | |
| ... | ... | ... | ... | ... | ... |
| 53 | TUMBES | TUMBES | 42.0 | 10.0 | 0.0 |
| ZARUMILLA | 4.0 | 0.0 | 0.0 | ||
| UCAYALI | ATALAYA | 4.0 | 0.0 | 1.0 | |
| CORONEL PORTILLO | 404.0 | 47.0 | 4.0 | ||
| PADRE ABAD | 18.0 | 4.0 | 0.0 | ||
3706 rows × 3 columns
The idea is get the sgare of people in ALARM status. For that we need this:
sumCases=ByYearPlace_wide.sum(axis=1)
sumCases| 0 | |||
|---|---|---|---|
| semana | departamento | provincia | |
| 1 | AMAZONAS | BAGUA | 68.0 |
| CHACHAPOYAS | 12.0 | ||
| CONDORCANQUI | 17.0 | ||
| UTCUBAMBA | 82.0 | ||
| ANCASH | CASMA | 16.0 | |
| ... | ... | ... | ... |
| 53 | TUMBES | TUMBES | 52.0 |
| ZARUMILLA | 4.0 | ||
| UCAYALI | ATALAYA | 5.0 | |
| CORONEL PORTILLO | 455.0 | ||
| PADRE ABAD | 22.0 |
3706 rows × 1 columns
# here you are:
shareAlarma=ByYearPlace_wide.loc[:,('case','sum','2_ALARMA')]/sumCases
shareAlarma.name='shareAlarma'
shareAlarma| shareAlarma | |||
|---|---|---|---|
| semana | departamento | provincia | |
| 1 | AMAZONAS | BAGUA | 0.102941 |
| CHACHAPOYAS | 0.166667 | ||
| CONDORCANQUI | 0.058824 | ||
| UTCUBAMBA | 0.060976 | ||
| ANCASH | CASMA | 0.125000 | |
| ... | ... | ... | ... |
| 53 | TUMBES | TUMBES | 0.192308 |
| ZARUMILLA | 0.000000 | ||
| UCAYALI | ATALAYA | 0.000000 | |
| CORONEL PORTILLO | 0.103297 | ||
| PADRE ABAD | 0.181818 |
3706 rows × 1 columns
No multi index:
shareAlarma=shareAlarma.reset_index()
shareAlarma| semana | departamento | provincia | shareAlarma | |
|---|---|---|---|---|
| 0 | 1 | AMAZONAS | BAGUA | 0.102941 |
| 1 | 1 | AMAZONAS | CHACHAPOYAS | 0.166667 |
| 2 | 1 | AMAZONAS | CONDORCANQUI | 0.058824 |
| 3 | 1 | AMAZONAS | UTCUBAMBA | 0.060976 |
| 4 | 1 | ANCASH | CASMA | 0.125000 |
| ... | ... | ... | ... | ... |
| 3701 | 53 | TUMBES | TUMBES | 0.192308 |
| 3702 | 53 | TUMBES | ZARUMILLA | 0.000000 |
| 3703 | 53 | UCAYALI | ATALAYA | 0.000000 |
| 3704 | 53 | UCAYALI | CORONEL PORTILLO | 0.103297 |
| 3705 | 53 | UCAYALI | PADRE ABAD | 0.181818 |
3706 rows × 4 columns
Let's find thwe worst province per Region in a year:
where = shareAlarma.groupby(['semana','departamento'])['shareAlarma'].idxmax()
worst_prov_year = shareAlarma.loc[where].reset_index(drop=True)
worst_prov_year| semana | departamento | provincia | shareAlarma | |
|---|---|---|---|---|
| 0 | 1 | AMAZONAS | CHACHAPOYAS | 0.166667 |
| 1 | 1 | ANCASH | CASMA | 0.125000 |
| 2 | 1 | AYACUCHO | HUANTA | 0.058824 |
| 3 | 1 | CAJAMARCA | CUTERVO | 1.000000 |
| 4 | 1 | CUSCO | QUISPICANCHI | 0.117647 |
| ... | ... | ... | ... | ... |
| 1006 | 53 | PASCO | OXAPAMPA | 0.000000 |
| 1007 | 53 | PIURA | PIURA | 0.222222 |
| 1008 | 53 | SAN MARTIN | EL DORADO | 0.500000 |
| 1009 | 53 | TUMBES | TUMBES | 0.192308 |
| 1010 | 53 | UCAYALI | PADRE ABAD | 0.181818 |
1011 rows × 4 columns
worst_prov_year.shareAlarma.describe()| shareAlarma | |
|---|---|
| count | 1011.000000 |
| mean | 0.190223 |
| std | 0.228024 |
| min | 0.000000 |
| 25% | 0.039749 |
| 50% | 0.124402 |
| 75% | 0.244315 |
| max | 1.000000 |
# amount of worst provinces per region
len(worst_prov_year.provincia.value_counts())88
# amount of worst provinces per region - cleaner
len(worst_prov_year[worst_prov_year.shareAlarma>0].provincia.value_counts())86
Some filtering:
worst_ProvYear_alarma=worst_prov_year[worst_prov_year.shareAlarma>0].loc[:,['departamento','provincia']]
worst_ProvYear_alarma.reset_index(drop=True,inplace=True)
worst_ProvYear_alarma| departamento | provincia | |
|---|---|---|
| 0 | AMAZONAS | CHACHAPOYAS |
| 1 | ANCASH | CASMA |
| 2 | AYACUCHO | HUANTA |
| 3 | CAJAMARCA | CUTERVO |
| 4 | CUSCO | QUISPICANCHI |
| ... | ... | ... |
| 873 | MADRE DE DIOS | TAMBOPATA |
| 874 | PIURA | PIURA |
| 875 | SAN MARTIN | EL DORADO |
| 876 | TUMBES | TUMBES |
| 877 | UCAYALI | PADRE ABAD |
878 rows × 2 columns
indexList=['departamento','provincia']
aggregator={'provincia':['count']}
worst_ProvYear_alarma_Frequency=worst_ProvYear_alarma.groupby(indexList,observed=True).agg(aggregator)
worst_ProvYear_alarma_Frequency| provincia | ||
|---|---|---|
| count | ||
| departamento | provincia | |
| AMAZONAS | BAGUA | 27 |
| CHACHAPOYAS | 6 | |
| CONDORCANQUI | 10 | |
| UTCUBAMBA | 10 | |
| ANCASH | CASMA | 24 |
| ... | ... | ... |
| TUMBES | ZARUMILLA | 7 |
| UCAYALI | ATALAYA | 23 |
| CORONEL PORTILLO | 14 | |
| PADRE ABAD | 15 | |
| PURUS | 1 |
86 rows × 1 columns
The count informs how many years a province was the most affected:
worst_ProvYear_alarma_Frequency.describe()| provincia | |
|---|---|
| count | |
| count | 86.000000 |
| mean | 10.209302 |
| std | 10.496068 |
| min | 1.000000 |
| 25% | 2.250000 |
| 50% | 7.000000 |
| 75% | 13.750000 |
| max | 47.000000 |
# final look
worst_ProvYear_alarma_Frequency.columns=['yearsAffected']
worst_ProvYear_alarma_Frequency=worst_ProvYear_alarma_Frequency[worst_ProvYear_alarma_Frequency.yearsAffected>2]
worst_ProvYear_alarma_Frequency.reset_index(inplace=True)
worst_ProvYear_alarma_Frequency| departamento | provincia | yearsAffected | |
|---|---|---|---|
| 0 | AMAZONAS | BAGUA | 27 |
| 1 | AMAZONAS | CHACHAPOYAS | 6 |
| 2 | AMAZONAS | CONDORCANQUI | 10 |
| 3 | AMAZONAS | UTCUBAMBA | 10 |
| 4 | ANCASH | CASMA | 24 |
| ... | ... | ... | ... |
| 59 | TUMBES | TUMBES | 26 |
| 60 | TUMBES | ZARUMILLA | 7 |
| 61 | UCAYALI | ATALAYA | 23 |
| 62 | UCAYALI | CORONEL PORTILLO | 14 |
| 63 | UCAYALI | PADRE ABAD | 15 |
64 rows × 3 columns
Let's plot:
alt_worstProv=alt.Chart(worst_ProvYear_alarma_Frequency)
enc_worstProv=alt_worstProv.encode(
y='departamento',
x='provincia',
text='yearsAffected:O',
size='yearsAffected:O'
)
enc_worstProv.mark_text()/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
Let's try another info:
indexList=['semana','departamento','enfermedad_text']
aggregator={'case':['sum']}
ByYearDepa=dengue.groupby(indexList,observed=True).agg(aggregator)
ByYearDepa_wide=ByYearDepa.unstack().fillna(0)
ByYearDepaAlarm=ByYearDepa_wide.loc[:,('case','sum','2_ALARMA')]/ByYearDepa_wide.sum(axis=1)
ByYearDepaAlarm.name='alarmShare'
ByYearDepaAlarm=ByYearDepaAlarm.reset_index()
ByYearDepaAlarm| semana | departamento | alarmShare | |
|---|---|---|---|
| 0 | 1 | AMAZONAS | 0.083799 |
| 1 | 1 | ANCASH | 0.107143 |
| 2 | 1 | AYACUCHO | 0.046632 |
| 3 | 1 | CAJAMARCA | 0.073232 |
| 4 | 1 | CUSCO | 0.040179 |
| ... | ... | ... | ... |
| 1006 | 53 | PASCO | 0.000000 |
| 1007 | 53 | PIURA | 0.166667 |
| 1008 | 53 | SAN MARTIN | 0.138249 |
| 1009 | 53 | TUMBES | 0.173333 |
| 1010 | 53 | UCAYALI | 0.105809 |
1011 rows × 3 columns
ByYearDepaAlarm.describe()| semana | alarmShare | |
|---|---|---|
| count | 1011.000000 | 1011.000000 |
| mean | 26.845697 | 0.086449 |
| std | 15.337520 | 0.080580 |
| min | 1.000000 | 0.000000 |
| 25% | 14.000000 | 0.026726 |
| 50% | 27.000000 | 0.075601 |
| 75% | 40.000000 | 0.130126 |
| max | 53.000000 | 1.000000 |
ByYearDepaAlarm_focus=ByYearDepaAlarm[ByYearDepaAlarm.alarmShare>0]ByYearDepaAlarm_focus.describe()| semana | alarmShare | |
|---|---|---|
| count | 878.000000 | 878.000000 |
| mean | 26.109339 | 0.099544 |
| std | 15.305759 | 0.078568 |
| min | 1.000000 | 0.002370 |
| 25% | 13.000000 | 0.042359 |
| 50% | 25.000000 | 0.094045 |
| 75% | 39.000000 | 0.136035 |
| max | 53.000000 | 1.000000 |
edges=[-1, .10, .25, .5,1]
theLabels=["a.below10%","b.11-25%","c.26-50%","d.above50%"]
ByYearDepaAlarm_focus.loc[:,"alarmLevels"]=pd.cut(ByYearDepaAlarm_focus['alarmShare'],
include_lowest=True,
bins=edges,
labels=theLabels,
ordered=True)
##
ByYearDepaAlarm_focus.head()<ipython-input-57-af1b086842a6>:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
ByYearDepaAlarm_focus.loc[:,"alarmLevels"]=pd.cut(ByYearDepaAlarm_focus['alarmShare'],
| semana | departamento | alarmShare | alarmLevels | |
|---|---|---|---|---|
| 0 | 1 | AMAZONAS | 0.083799 | a.below10% |
| 1 | 1 | ANCASH | 0.107143 | b.11-25% |
| 2 | 1 | AYACUCHO | 0.046632 | a.below10% |
| 3 | 1 | CAJAMARCA | 0.073232 | a.below10% |
| 4 | 1 | CUSCO | 0.040179 | a.below10% |
alt_WorstDepa=alt.Chart(ByYearDepaAlarm_focus).encode(x='semana:O',
y=alt.Y('departamento:N',
sort=alt.EncodingSortField(field='alarmShare',op='max',order='descending')))
enc1_WorstDepa = alt_WorstDepa.encode(
color=alt.Color('alarmLevels:O', scale=alt.Scale(scheme="lightgreyred", reverse=False))
)
enc1_WorstDepa.mark_rect()/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
enc2_WorstDepa=alt_WorstDepa.encode(
text=alt.Text('alarmShare:Q', format=".1f"),
opacity=alt.condition('datum.alarmShare >= 0.3', alt.value(1), alt.value(0)))
enc2_WorstDepa.mark_text(fontStyle='bold')/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
enc1_WorstDepa.mark_rect() + enc2_WorstDepa.mark_text()/usr/local/lib/python3.10/dist-packages/altair/utils/core.py:384: FutureWarning: the convert_dtype parameter is deprecated and will be removed in a future version. Do ``ser.astype(object).apply()`` instead if you want ``convert_dtype=False``.
elif dtype_name == "string":
You can find different color schemes here